## packages: remove or add your necessary packages
required_packages <- c("tidyverse", "readxl", "ggthemes", "hrbrthemes", "extrafont", "plotly", "scales", "stringr", "gganimate", "here", "tidytext", "sentimentr", "scales", "DT", "here", "sm", "mblm", "glue", "fs", "knitr", "rmdformats", "janitor", "urltools", "colorspace", "pdftools")
library(ggplot2) # CRAN v3.3.6
library(colorspace) # CRAN v2.0-3
library(here) # CRAN v1.0.1
library(dplyr) # CRAN v1.0.10
library(janitor) # CRAN v2.1.0
library(gt) # CRAN v0.5.0
library(tidyr) # CRAN v1.2.1
library(readr) # CRAN v2.1.3
library(stringr) # CRAN v1.4.1
library(tidytext)
library(ggalt)
library(ggchicklet)
library(forcats)
library(ggfittext)
library(patchwork)
library(lubridate)
library(ggforce)
# for(i in required_packages) {
# if(!require(i, character.only = T)) {
#
# # if package is not existing, install then load the package
# install.packages(i, dependencies = T)
# require(i, character.only = T)
# }
# }
## save plots?
save <- TRUE
#save <- FALSE
## quality of png's
dpi <- 750
## font adjust; please adjust to client´s website
#extrafont::loadfonts(device = "win", quiet = TRUE)
#font_add_google("Montserrat", "Montserrat")
# font_add_google("Overpass", "Overpass")
# font_add_google("Overpass Mono", "Overpass Mono")
## theme updates; please adjust to client´s website
#theme_set(ggthemes::theme_clean(base_size = 15))
theme_set(ggthemes::theme_clean(base_size = 20, base_family = "Ubuntu"))
theme_update(plot.margin = margin(30, 30, 30, 30),
plot.background = element_rect(color = "white",
fill = "white"),
plot.title = element_text(size = 20,
face = "bold",
lineheight = 1.05,
hjust = .5,color="#ffac36",
margin = margin(10, 0, 25, 0)),
plot.title.position = "plot",
plot.caption = element_text(color = "grey40",
size = 9,
margin = margin(20, 0, -20, 0)),
plot.caption.position = "plot",
axis.line.x = element_line(color = "black",
size = .8),
axis.line.y = element_line(color = "black",
size = .8),
axis.title.x = element_text(size = 19,
face = "bold",
margin = margin(t = 20)),
axis.title.y = element_text(size = 19,
face = "bold",
margin = margin(r = 20)),
axis.text = element_text(size = 11,
color = "black",
face = "bold"),
axis.text.x = element_text(margin = margin(t = 10)),
axis.text.y = element_text(margin = margin(r = 10)),
axis.ticks = element_blank(),
panel.grid.major.x = element_line(size = .6,
color = "#eaeaea",
linetype = "solid"),
panel.grid.major.y = element_line(size = .6,
color = "#eaeaea",
linetype = "solid"),
panel.grid.minor.x = element_line(size = .6,
color = "#eaeaea",
linetype = "solid"),
panel.grid.minor.y = element_blank(),
panel.spacing.x = unit(4, "lines"),
panel.spacing.y = unit(2, "lines"),
legend.position = "top",
legend.title = element_text(family = "Ubuntu",
size = 14, color="black",
margin = margin(5, 0, 5, 0)),
legend.text = element_text(family = "Ubuntu",
color = "black",
size = 11,
margin = margin(4.5, 4.5, 4.5, 4.5)),
legend.background = element_rect(fill = NA,
color = NA),
legend.key = element_rect(color = NA, fill = NA),
#legend.key.width = unit(5, "lines"),
#legend.spacing.x = unit(.05, "pt"),
#legend.spacing.y = unit(.55, "pt"),
#legend.margin = margin(0, 0, 10, 0),
strip.text = element_text(face = "bold",
margin = margin(b = 10)))
## theme settings for flipped plots
theme_flip <-
theme(panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_line(size = .6,
color = "#eaeaea"))
## theme settings for maps
theme_map <-
theme_void(base_family = "Ubuntu") +
theme(legend.direction = "horizontal",
legend.box = "horizontal",
legend.margin = margin(10, 10, 10, 10),
legend.title = element_text(size = 17,
face = "bold"),
legend.text = element_text(color = "grey33",
size = 12),
plot.margin = margin(15, 5, 15, 5),
plot.title = element_text(face = "bold",
size = 20,
hjust = .5,
margin = margin(30, 0, 10, 0)),
plot.subtitle = element_text(face = "bold",
color = "grey33",
size = 17,
hjust = .5,
margin = margin(10, 0, -30, 0)),
plot.caption = element_text(size = 14,
color = "grey33",
hjust = .97,
margin = margin(-30, 0, 0, 0)))
## numeric format for labels
num_format <- scales::format_format(big.mark = ",", small.mark = ",", scientific = F)
## main color backlinko
bl_col <- "#00d188"
bl_dark <- darken(bl_col, .3, space = "HLS")
## colors + labels for interval stripes
int_cols <- c("#bce2d5", "#79d8b6", bl_col, "#009f66", "#006c45", "#003925")
int_perc <- c("100%", "95%", "75%", "50%", "25%", "5%")
## colors for degrees (Bachelors, Massters, Doctorate in reverse order)
cols_degree <- c("#e64500", "#FFCC00", darken(bl_col, .1))
## gradient colors for position
colfunc <- colorRampPalette(c(bl_col, "#bce2d5"))
pos_cols <- colfunc(10)youtube_data <- read_csv(here("proc_data","youtube_data_proc.csv"))
youtube_data_activities <- read_csv(here("proc_data","youtube_data_activities_proc.csv"))
tiktok_data <- read_csv(here("proc_data","tiktok_data_proc.csv"))
tiktok_data_activities <- read_csv(here("proc_data","tiktok_data_activities_proc.csv"))yt_vids <- youtube_data %>% distinct(yt_video_id) %>% nrow()
tt_vids <- tiktok_data %>% distinct(tt_video_id) %>% nrow()youtube_data_activities %>% group_by(yt_video_id) %>% summarise(idn=max(idea)) %>%
pull(idn) %>% {length(which(.>1))}-> mult_ideas_yt
tiktok_data_activities %>% group_by(tt_video_id) %>% summarise(idn=max(idea)) %>%
pull(idn) %>% {length(which(.>1))} -> mult_ideas_tt
meanytlength <- youtube_data$video_length %>% summary %>% {./60}
meanttlength <- tiktok_data$video_meta_duration %>% summary YouTube: 177 videos (unique video url identifiers,
includes YT shorts)
TikTok: 177 videos
YouTube videos are longer (12.15141 minutes on average for the sampled videos), so approximately one third of the videos examined (53/177) included >1 money-making idea. TikTok videos have a shorter maximum length (3 to 10 minutes; 41.14384 seconds on average for the sampled videos) so videos on this platform tend to feature a single idea. Only 4 of the 145 TikTok videos examined provided more than one money-making idea.
youtube_data <- youtube_data %>% mutate(month=month(ymd(youtube_data$publish_date)),
pyear=year(ymd(youtube_data$publish_date))) %>%
mutate(pub_date=ymd(publish_date))
tiktok_data <- tiktok_data %>% mutate(month=month(ymd_hms(tiktok_data$create_time_iso)), pyear=year(ymd_hms(tiktok_data$create_time_iso))) %>%
mutate(pub_date=date(ymd_hms(create_time_iso))) 3/4 of the YouTube videos examined were published in 2022, and across all the videos sampled (published since 2018), most are from the summer/fall season (Northern Hemisphere).
TikTok videos in the sample were published between 2019-2022, with more videos uploaded with each passing year. The month with most uploads is July.
tiktok_data %>% tabyl(pyear) %>% round(2)## pyear n percent
## 2019 4 0.03
## 2020 29 0.20
## 2021 50 0.34
## 2022 63 0.43
Publication month also varied between platforms.
youtube_data %>% count(month) %>%
ggplot()+
geom_bar(aes(x=month,y=n),stat = "identity")+
scale_x_discrete(limits=month.abb) +labs(subtitle = "YouTube data")tiktok_data %>% count(month) %>%
ggplot()+
geom_bar(aes(x=month,y=n),stat = "identity")+
scale_x_discrete(limits=month.abb) +labs(subtitle = "TikTok data")Considering publication dates, videos published earlier do not tend to accumulate more views and comments over time. Engagement is also mostly unrelated to subscriber/follower counts and thus possibly related to content.
ttdatevc <- tiktok_data %>% select(source,pub_date,
comments=comment_count,
views=play_count,
followers=author_meta_fans)
ytdatevc <- youtube_data %>% select(source,pub_date,
comments=comments,
views=view_count,
followers=subs_numeric)
dates_views_comments <- bind_rows(ttdatevc,ytdatevc)
ggplot(dates_views_comments)+
geom_point(aes(x=pub_date,y=views,color=source))+
labs(x="Publication date")ggplot(dates_views_comments)+
geom_point(aes(x=pub_date,y=comments,color=source))+
labs(x="Publication date")ggplot(dates_views_comments)+
geom_point(aes(x=views,y=comments,color=source))ggplot(dates_views_comments)+
geom_point(aes(x=followers,y=comments,color=source))dates_views_comments %>% filter(followers!=44100000) %>%
ggplot()+
geom_point(aes(x=followers,y=comments,color=source))+
labs(subtitle = "removed outlier")ggplot(dates_views_comments)+
geom_point(aes(x=followers,y=views,color=source))dates_views_comments %>% filter(followers!=44100000) %>%
ggplot()+
geom_point(aes(x=followers,y=views,color=source))+
labs(subtitle = "removed outlier")yt_presenter_demog_gend <- youtube_data %>% tabyl(presenter_gender) %>%
mutate(valid_percent=round(valid_percent,2)) %>% mutate(source="YouTube")
yt_malepct <- yt_presenter_demog_gend$valid_percent[2]
tt_presenter_demog_gend <- tiktok_data %>% tabyl(presenter_gender) %>%
mutate(valid_percent=round(valid_percent,2)) %>% mutate(source="TikTok")
tt_malepct <- tt_presenter_demog_gend$valid_percent[2]
yt_ages <- youtube_data %>% tabyl(presenter_age) %>% na.omit() %>% select(-percent) %>% mutate(percent=round(valid_percent,2)) %>% select(-valid_percent) %>% mutate(source="YouTube")
tt_ages <- tiktok_data %>% tabyl(presenter_age) %>% na.omit() %>% select(-percent) %>% mutate(percent=round(valid_percent,2)) %>% select(-valid_percent)%>% mutate(source="TikTok")Male presenters were represented more on both platforms (YouTube: 0.86% and TikTok 0.8%), and the 20-30 y.o. age category had the highest proportion with ~40%.
gt(yt_ages) %>% tab_header("YouTube")| YouTube | |||
|---|---|---|---|
| presenter_age | n | percent | source |
| 10 - 20 | 6 | 0.03 | YouTube |
| 20 - 30 | 76 | 0.43 | YouTube |
| 30 - 40 | 49 | 0.28 | YouTube |
| 40 - 50 | 4 | 0.02 | YouTube |
| 50+ | 3 | 0.02 | YouTube |
| Voice-over | 20 | 0.11 | YouTube |
| Voice-over Text-to-Speech | 18 | 0.10 | YouTube |
gt(tt_ages) %>% tab_header("TikTok")| TikTok | |||
|---|---|---|---|
| presenter_age | n | percent | source |
| 10 - 20 | 14 | 0.10 | TikTok |
| 20 - 30 | 57 | 0.40 | TikTok |
| 30 - 40 | 24 | 0.17 | TikTok |
| 40 - 50 | 5 | 0.04 | TikTok |
| 50+ | 2 | 0.01 | TikTok |
| Music | 8 | 0.06 | TikTok |
| Voice-over | 6 | 0.04 | TikTok |
| Voice-over Text-to-Speech | 25 | 0.18 | TikTok |
gendplt <-
bind_rows(tt_presenter_demog_gend,yt_presenter_demog_gend) %>% na.omit() %>%
ggplot(aes(x=source,y=valid_percent,fill=presenter_gender,label=presenter_gender))+
geom_chicklet()+
labs(x="Video Source",y="presenter representation")+
geom_bar_text(family="Ubuntu",fullheight = T,size=20)+
scale_fill_manual(values=c("#003eaa","#46A5FF"),guide="none")
ageplt <-
bind_rows(yt_ages,tt_ages) %>%
filter(str_detect(presenter_age,"^[0-9]")) %>%
ggplot(aes(x=fct_reorder(presenter_age,percent),y=n,fill=source,color=source))+
geom_chicklet(position = "dodge")+
labs(y="Number of videos",x="Presenter Age",
caption = "Excludes text-to-speech or voice-over presenters")+
scale_fill_manual(values = c("#00f2ea","#FF0000"),name="Video Source")+
scale_color_manual(values=c("#ff0050","#282828"),name="Video Source")+
coord_flip()
ageplt+gendpltYouTube videos, as categorized by their authors, varied in assignment despite the similar overarching topic.
The most common category was Education, followed by How-to % Stlye, and then all the others.
youtube_data %>% tabyl(category) %>% arrange(-n) %>%
mutate(across(where(is.numeric),round,2)) %>% gt() %>% tab_header(title = "YouTube data")| YouTube data | ||
|---|---|---|
| category | n | percent |
| Education | 96 | 0.54 |
| Howto & Style | 46 | 0.26 |
| People & Blogs | 27 | 0.15 |
| Entertainment | 7 | 0.04 |
| News & Politics | 1 | 0.01 |
ytearn <-
youtube_data_activities %>%
mutate(earnings_1_unit=earnings/earnings_timeframe_number) %>%
group_by(yt_video_id,idea,earnings_timeframe) %>%
summarise(earn=mean(earnings_1_unit,na.rm=TRUE)) %>% ungroup() %>% na.omit() %>%
filter(earnings_timeframe!="No timeframe provided")
ttearn <-
tiktok_data_activities %>%
mutate(earnings_1_unit=earnings/earnings_timeframe_number) %>%
group_by(tt_video_id,idea,earnings_timeframe) %>%
summarise(earn=mean(earnings_1_unit,na.rm=TRUE)) %>% ungroup() %>% na.omit() %>%
filter(earnings_timeframe!="No timeframe provided")
# earnings time frames
earn_tf <- bind_rows(ytearn,ttearn) %>% count(earnings_timeframe) %>% arrange(-n)
earnings_by_tf <-
bind_rows(ytearn,ttearn) %>% group_by(earnings_timeframe) %>%
summarize(median_earn=median(earn),
min_earn=min(earn),max_earn=max(earn),
sd_earn=sd(earn,na.rm = TRUE)) %>% arrange(-median_earn)gt(earn_tf)| earnings_timeframe | n |
|---|---|
| Days | 67 |
| Months | 65 |
| Hours | 56 |
| Minutes | 20 |
| Weeks | 9 |
| Years | 5 |
| Per Post | 1 |
gt(earnings_by_tf)%>%
fmt_number(
columns = -earnings_timeframe,
decimals = 1,
use_seps = FALSE
)| earnings_timeframe | median_earn | min_earn | max_earn | sd_earn |
|---|---|---|---|---|
| Years | 100000.0 | 500.0 | 400000.0 | 155449.2 |
| Months | 5000.0 | 15.0 | 300000.0 | 50262.5 |
| Weeks | 1050.0 | 24.0 | 14000.0 | 4357.9 |
| Days | 500.0 | 2.6 | 7000.0 | 1332.4 |
| Hours | 35.5 | 3.0 | 487.9 | 110.4 |
| Per Post | 33.3 | 33.3 | 33.3 | NA |
| Minutes | 5.1 | 0.1 | 30.0 | 6.7 |
temporal_earn <- c("Days","Hours","Minutes","Months","Weeks","Years")
yt_tempearn <- ytearn %>% filter(earnings_timeframe %in% temporal_earn)
tt_tempearn <- ttearn %>% filter(earnings_timeframe %in% temporal_earn)
yt_hourly_earn <-
yt_tempearn %>% mutate(hourly_earn=case_when(
earnings_timeframe=="Hours"~earn,
earnings_timeframe=="Minutes"~earn*60,
earnings_timeframe=="Days"~earn/8,
earnings_timeframe=="Weeks"~earn/40,
earnings_timeframe=="Months"~earn/200,
earnings_timeframe=="Years"~earn/2400
)) %>% mutate(source="YouTube")
tt_hourly_earn <-
tt_tempearn %>% mutate(hourly_earn=case_when(
earnings_timeframe=="Hours"~earn,
earnings_timeframe=="Minutes"~earn*60,
earnings_timeframe=="Days"~earn/8,
earnings_timeframe=="Weeks"~earn/40,
earnings_timeframe=="Months"~earn/200,
earnings_timeframe=="Years"~earn/2400
)) %>% mutate(source="TikTok")
all_earn <- bind_rows(yt_hourly_earn,tt_hourly_earn)
tt_h_earnsum <-
tt_hourly_earn %>% summarize(mean_earn=mean(hourly_earn),
median_earn=median(hourly_earn))
yt_h_earnsum <-
yt_hourly_earn %>% summarize(mean_earn=mean(hourly_earn),
median_earn=median(hourly_earn))
hourly_med <- median(all_earn$hourly_earn)
bind_rows(yt_hourly_earn,tt_hourly_earn) %>%
ggplot()+
geom_histogram(aes(hourly_earn,fill=source,color=source),
alpha=0.7,linewidth=0.6,bins=25)+
labs(x="Hourly earnings ($)",
y="Number of money-making ideas",
title="Distribution of standardized hourly earnings")+
scale_fill_manual(values = c("#00f2ea","#FF0000"),name="Video Source")+
scale_color_manual(values=c("#ff0050","#282828"),name="Video Source")+
theme(plot.title = element_text(color = "#ffac36"),
legend.position = "bottom",text = element_text(family='Ubuntu'))
For videos that report earnings associated with a temporal reference ($
earned per unit of time), earnings can be reported in a common unit by
assuming 8 hour work days and 5 day work weeks. The median hourly
earnings is 50.
Once standardized, the timeframe with the highest median earnings was minutes. For this time frame, 18 of 20 videos suggest that it is possible to make between 1 and 30 USD per minute with their ideas. This becomes up to $1800 per hour.
all_earn %>% group_by(earnings_timeframe) %>%
summarize(median_earn=median(hourly_earn),
min_earn=min(hourly_earn),max_earn=max(hourly_earn),
sd_earn=sd(hourly_earn,na.rm = TRUE)) %>% arrange(-median_earn) %>% gt() %>%
fmt_number(
columns = -earnings_timeframe,
decimals = 1,
use_seps = FALSE
)| earnings_timeframe | median_earn | min_earn | max_earn | sd_earn |
|---|---|---|---|---|
| Minutes | 306.0 | 3.0 | 1800.0 | 403.6 |
| Days | 62.5 | 0.3 | 875.0 | 166.6 |
| Years | 41.7 | 0.2 | 166.7 | 64.8 |
| Hours | 35.5 | 3.0 | 487.9 | 110.4 |
| Weeks | 26.2 | 0.6 | 350.0 | 108.9 |
| Months | 25.0 | 0.1 | 1500.0 | 251.3 |
Across all videos, earnings are right-skewed. 90% of videos report hourly earnings < 384.138.
std hourly earnings varied by platform
This distribution is also evident within earnings timeframes.
bind_rows(yt_hourly_earn,tt_hourly_earn) %>%
ggplot()+
geom_histogram(aes(earn))+
facet_wrap(~earnings_timeframe,scales = 'free')The more common categories (Education, Howto & Style) did not report the higher mean or median standardized earnings. Instead, the People and Blogs category and Entertainment had the top two positions.
yt_hourlycorrs <- left_join(yt_hourly_earn,youtube_data_activities)
yt_hourlycorrs_chp <- yt_hourlycorrs %>% group_by(yt_video_id,idea) %>%
chop(business_type_level_1) %>% chop(business_type_level_2) %>% chop(skills_required) %>% ungroup()yt_hourlycorrs_chp %>%
group_by(category) %>% summarise(mean_earn=mean(hourly_earn),
med_earn=median(hourly_earn)) %>%
arrange(-mean_earn) %>% gt()| category | mean_earn | med_earn |
|---|---|---|
| People & Blogs | 386.75750 | 168.75 |
| Entertainment | 127.05000 | 125.00 |
| Howto & Style | 105.40231 | 80.00 |
| Education | 98.01758 | 40.00 |
yt_hourlycorrs_chp %>%
ggplot(aes(x=category,y=hourly_earn,color=category))+
geom_sina() + scale_color_discrete(guide="none")For all YouTube videos, the predominant Business Type for the money-making ideas was Publication, Media, and Blogs, followed by the Service Business. Other business types were less common.
# without earnings
yt_acts_chp <- youtube_data_activities %>% group_by(yt_video_id,idea) %>%
chop(business_type_level_1) %>% chop(business_type_level_2) %>% chop(skills_required) %>% ungroup()
yt_bus1 <- youtube_data_activities %>% group_by(yt_video_id,idea) %>%
unchop(business_type_level_1) %>% ungroup()
bus1ct <- yt_bus1 %>% group_by(yt_video_id,idea) %>%
distinct(yt_video_id,idea,business_type_level_1) %>%
tabyl(business_type_level_1) %>% arrange(-n)
bus1ct %>% na.omit() %>% mutate(percent=round(valid_percent,2)) %>% select(-valid_percent) %>%
gt() %>% tab_header("YouTube",subtitle = "Business Types, all videos") | YouTube | ||
|---|---|---|
| Business Types, all videos | ||
| business_type_level_1 | n | percent |
| Publication, Media & Blog | 175 | 0.45 |
| Service Business | 122 | 0.31 |
| Ecommerce & Consumer | 56 | 0.14 |
| Investing | 27 | 0.07 |
| Software & Tech | 9 | 0.02 |
For videos and ideas with reported earnings, the business activity with the highest earnings (standardized) was Publication, Media, & Blog, followed by investing.
# with earninings
yt_hourlycorrs_bus1 <- yt_hourlycorrs %>% group_by(yt_video_id,idea) %>%
unchop(business_type_level_1) %>% ungroup()
yt_hourlycorrs_bus1 %>% group_by(yt_video_id,idea) %>%
distinct(yt_video_id,idea,business_type_level_1,hourly_earn) %>% ungroup() %>%
group_by(business_type_level_1) %>%
summarise(mean_earn=mean(hourly_earn),
median_earn=median(hourly_earn)) %>%
arrange(-median_earn) %>% gt() %>% tab_header("YouTube",
subtitle = "standardized hourly earning by business types")| YouTube | ||
|---|---|---|
| standardized hourly earning by business types | ||
| business_type_level_1 | mean_earn | median_earn |
| Publication, Media & Blog | 163.73000 | 70.00000 |
| Investing | 31.20833 | 26.25000 |
| Service Business | 76.22654 | 25.00000 |
| Software & Tech | 20.00000 | 20.00000 |
| Ecommerce & Consumer | 48.28748 | 16.77083 |
However, there is considerable variation in earnings across the
different business types
yt_hourlycorrs_bus1 %>% group_by(yt_video_id,idea) %>%
distinct(yt_video_id,idea,business_type_level_1,hourly_earn) %>% ungroup %>%
ggplot()+
geom_sina(aes(x=str_wrap(business_type_level_1,12),
y=hourly_earn,color=business_type_level_1))+
scale_color_discrete(guide="none")+labs(x="Business Type (level 1)")yt_bus2 <- youtube_data_activities %>% group_by(yt_video_id,idea) %>%
unchop(business_type_level_2) %>% ungroup()
n_bus2 <- youtube_data_activities %>% distinct(business_type_level_2) %>% nrow()
maxnbus2 <- yt_bus2 %>% group_by(yt_video_id,idea) %>%
distinct(yt_video_id,idea,business_type_level_2) %>% ungroup() %>% group_by(yt_video_id,idea) %>% summarise(nbus2=n()) %>% arrange(-nbus2) %>% pull(nbus2) %>% max()The second-level classification of Business Activities for making money includes many more categories (51). Many combinations of Business Types were possible for each video/idea, but none included more than 3.
At this level no particular business type predominated, none represented >20% of suggested activities. The most frequent business type was Publication, Media & Blog - Affiliate Marketing, followed by Service Business - Microwork/tasks (e.g. Taking surveys, searching for data, tagging content and identifying images). Other types were much less common.
bus2ct <- yt_bus2 %>% group_by(yt_video_id,idea) %>%
distinct(yt_video_id,idea,business_type_level_2) %>%
tabyl(business_type_level_2) %>% arrange(-n)
bus2ct %>% na.omit() %>% mutate(percent=round(valid_percent,2)) %>% select(-valid_percent) %>% filter(n>1) %>%
gt() %>% tab_header("YouTube",subtitle = "Business Types (LEVEL 2), all videos") %>% tab_footnote(footnote = "n=1 not shown",
locations = cells_column_labels(
columns = n
)) | YouTube | ||
|---|---|---|
| Business Types (LEVEL 2), all videos | ||
| business_type_level_2 | n1 | percent |
| Publication, Media & Blog - Affiliate Marketing | 86 | 0.19 |
| Service Business - Microwork/tasks (e.g. Taking surveys, searching for data, tagging content and identifying images) | 64 | 0.14 |
| Publication, Media & Blog - YouTube | 35 | 0.08 |
| Service Business - Other freelance (e.g. on Upwork, Fiverr) | 25 | 0.06 |
| Service Business - Agency (e.g. Social Media Marketing, SEO, etc.) | 19 | 0.04 |
| Ecommerce & Consumer - Online shop on marketplaces (e.g. Ebay, Etsy) | 18 | 0.04 |
| Publication, Media & Blog - Courses | 17 | 0.04 |
| Ecommerce & Consumer - Dropshipping | 14 | 0.03 |
| Ecommerce & Consumer - Online Shop | 14 | 0.03 |
| Investing - Crypto | 13 | 0.03 |
| Publication, Media & Blog - Write a blog | 11 | 0.02 |
| Ecommerce & Consumer - Amazon FBA | 10 | 0.02 |
| Investing - Stocks | 10 | 0.02 |
| Ecommerce & Consumer - Sell belongings on marketplaces (e.g. Ebay, Craigslist, Etsy) | 9 | 0.02 |
| Investing - Real estate investing (e.g. House flipping/ Crowdfunding) | 8 | 0.02 |
| Publication, Media & Blog - Selling Digital Products (EBooks/Print On Demand etc)) | 8 | 0.02 |
| Publication, Media & Blog - Content Creator | 7 | 0.02 |
| Publication, Media & Blog - Influencer | 5 | 0.01 |
| Publication, Media & Blog - Youtube/Rumble/Pinterest Video Automation | 5 | 0.01 |
| Service Business - Become a Virtual Assistant | 5 | 0.01 |
| Software & Tech - Create A Website | 5 | 0.01 |
| Service Business - Freelance Writer | 4 | 0.01 |
| Ecommerce & Consumer - Facebook Marketplace | 3 | 0.01 |
| Publication, Media & Blog - Paid Community (Patreon) | 3 | 0.01 |
| Publication, Media & Blog - Spinning Articles | 3 | 0.01 |
| Publication, Media & Blog - Youtube Sponsorships | 3 | 0.01 |
| Service Business - Rent out stuff (e.g. storage space, truck) | 3 | 0.01 |
| Publication, Media & Blog - Image Sharing | 2 | 0.00 |
| Publication, Media & Blog - Membership Sites | 2 | 0.00 |
| Publication, Media & Blog - Newsletter/ Articles | 2 | 0.00 |
| Publication, Media & Blog - NFT's | 2 | 0.00 |
| Publication, Media & Blog - Podcasting | 2 | 0.00 |
| Publication, Media & Blog - Write a book | 2 | 0.00 |
| Service Business - Home Delivery Services | 2 | 0.00 |
| Service Business - Home Services (e.g. Power Washing, Pet sitting) | 2 | 0.00 |
| Service Business - Video Editor | 2 | 0.00 |
| Software & Tech - Create a mobile app | 2 | 0.00 |
| Software & Tech - Create a software | 2 | 0.00 |
| Software & Tech - Create Templates | 2 | 0.00 |
| 1 n=1 not shown | ||
The Business type with the highest mean standardized earnings was Publication, Media & Blog - Newsletter/ Articles, followed by Ecommerce & Consumer - Dropshipping and other types in the Publication/Media/Blogging fields.
yt_hourlycorrs_bus2 <- yt_hourlycorrs %>% group_by(yt_video_id,idea) %>%
unchop(business_type_level_2) %>% ungroup()
yt_hourlycorrs_bus2 %>% group_by(yt_video_id,idea) %>%
distinct(yt_video_id,idea,business_type_level_2,hourly_earn) %>% ungroup() %>%
group_by(business_type_level_2) %>%
summarise(mean_earn=mean(hourly_earn),
median_earn=median(hourly_earn)) %>%
arrange(-median_earn) %>% gt() %>% tab_header("YouTube, Business Type Level 2",
subtitle = "standardized hourly earning by business types")| YouTube, Business Type Level 2 | ||
|---|---|---|
| standardized hourly earning by business types | ||
| business_type_level_2 | mean_earn | median_earn |
| Publication, Media & Blog - Write a blog | 925.00000 | 925.00000 |
| Publication, Media & Blog - Newsletter/ Articles | 312.50000 | 312.50000 |
| Ecommerce & Consumer - Dropshipping | 255.56820 | 255.56820 |
| Publication, Media & Blog - Youtube Sponsorships | 230.62500 | 230.62500 |
| Publication, Media & Blog - Spinning Articles | 718.33333 | 180.00000 |
| Service Business - Sales Representative/Advisor | 170.00000 | 170.00000 |
| Publication, Media & Blog - Affiliate Marketing | 179.31921 | 105.65000 |
| Publication, Media & Blog - Youtube/Rumble/Pinterest Video Automation | 105.00000 | 105.00000 |
| Publication, Media & Blog - YouTube | 118.58227 | 75.00000 |
| Publication, Media & Blog - Membership Sites | 70.00000 | 70.00000 |
| Service Business - Rent out stuff (e.g. storage space, truck) | 68.75000 | 68.75000 |
| Investing - Crypto | 62.50000 | 62.50000 |
| Publication, Media & Blog - Selling Digital Products (EBooks/Print On Demand etc)) | 58.42406 | 56.25000 |
| Publication, Media & Blog - Content Creator | 50.00000 | 50.00000 |
| Software & Tech - Create A Website | 47.50000 | 47.50000 |
| Service Business - Home Services (e.g. Power Washing, Pet sitting) | 45.62500 | 45.62500 |
| Service Business - Other freelance (e.g. on Upwork, Fiverr) | 264.55625 | 40.62500 |
| Service Business - Agency (e.g. Social Media Marketing, SEO, etc.) | 47.08333 | 37.50000 |
| Service Business - Become a Virtual Assistant | 40.62500 | 35.00000 |
| Investing - Real estate investing (e.g. House flipping/ Crowdfunding) | 31.35417 | 31.35417 |
| Service Business - Freelance Writer | 40.50000 | 30.00000 |
| Service Business - Microwork/tasks (e.g. Taking surveys, searching for data, tagging content and identifying images) | 113.01125 | 28.50000 |
| Publication, Media & Blog - NFT's | 26.25000 | 26.25000 |
| Publication, Media & Blog - Creating Spotify Ads | 25.00000 | 25.00000 |
| Service Business - Home Delivery Services | 22.41667 | 22.41667 |
| Publication, Media & Blog - Courses | 77.02381 | 20.00000 |
| Ecommerce & Consumer - Online shop on marketplaces (e.g. Ebay, Etsy) | 20.84028 | 16.77083 |
| Service Business - Video Editor | 15.00000 | 15.00000 |
| Publication, Media & Blog - Paid Community (Patreon) | 27.30518 | 10.00000 |
| Service Business - Data Entry | 10.00000 | 10.00000 |
| Ecommerce & Consumer - Online Shop | 9.09500 | 9.09500 |
| Investing - Stocks | 22.33333 | 4.00000 |
The five business types at this level with n>6 show a wide range of earnings.
yt_hourlycorrs_bus2 %>% group_by(yt_video_id,idea) %>%
distinct(yt_video_id,idea,business_type_level_2,hourly_earn) %>% ungroup %>% add_count(business_type_level_2) %>% filter(n>6) %>%
ggplot()+
geom_sina(aes(x=str_wrap(business_type_level_2,33),
y=hourly_earn,color=business_type_level_2))+
scale_color_discrete(guide="none")+labs(x="Business Type (level 1)")+
coord_flip()+labs(x="Business Type Level 2")For TikTok videos, the predominant Business Type for the money-making ideas was Service Business with almost 50% of videos, followed by the Ecommerce & Consumer ventures. Other business types were less common.
# tt without earnings
tt_acts_chp <- tiktok_data_activities %>% group_by(tt_video_id,idea) %>%
chop(business_type_level_1) %>% chop(business_type_level_2) %>% chop(skills_required) %>% ungroup()
tt_bus1 <- tiktok_data_activities %>% group_by(tt_video_id,idea) %>%
unchop(business_type_level_1) %>% ungroup()
tt_bus1ct <- tt_bus1 %>% group_by(tt_video_id,idea) %>%
distinct(tt_video_id,idea,business_type_level_1) %>%
tabyl(business_type_level_1) %>% arrange(-n)
tt_bus1ct %>% na.omit() %>% mutate(percent=round(valid_percent,2)) %>% select(-valid_percent) %>%
gt() %>% tab_header("TikTok",subtitle = "Business Types, all videos") | TikTok | ||
|---|---|---|
| Business Types, all videos | ||
| business_type_level_1 | n | percent |
| Service Business | 74 | 0.49 |
| Ecommerce & Consumer | 36 | 0.24 |
| Publication, Media & Blog | 28 | 0.19 |
| Investing | 12 | 0.08 |
| Software & Tech | 1 | 0.01 |
For videos and ideas with reported earnings, the business activity with the highest earnings (standardized) was Investing, followed by Ecommerce & Consumer
# tt with earninings
tt_hourlycorrs <- left_join(tt_hourly_earn,tiktok_data_activities)
tt_hourlycorrs_chp <- tt_hourlycorrs %>% group_by(tt_video_id,idea) %>%
chop(business_type_level_1) %>% chop(business_type_level_2) %>% chop(skills_required) %>% ungroup()
tt_hourlycorrs_bus1 <- tt_hourlycorrs %>% group_by(tt_video_id,idea) %>%
unchop(business_type_level_1) %>% ungroup()
tt_hourlycorrs_bus1 %>% group_by(tt_video_id,idea) %>%
distinct(tt_video_id,idea,business_type_level_1,hourly_earn) %>% ungroup() %>%
group_by(business_type_level_1) %>%
summarise(mean_earn=mean(hourly_earn),
median_earn=median(hourly_earn)) %>%
arrange(-median_earn) %>% gt()| business_type_level_1 | mean_earn | median_earn |
|---|---|---|
| Ecommerce & Consumer | 227.55849 | 134.90062 |
| Investing | 82.22282 | 56.11292 |
| Service Business | 102.44798 | 40.00000 |
| Publication, Media & Blog | 261.69231 | 37.50000 |
| Software & Tech | 1.48500 | 1.48500 |
However, with some exceptions, earnings do not vary considerably across the different business types
tt_hourlycorrs_bus1 %>% group_by(tt_video_id,idea) %>%
distinct(tt_video_id,idea,business_type_level_1,hourly_earn) %>% ungroup %>%
ggplot()+
geom_jitter(aes(x=str_wrap(business_type_level_1,12),
y=hourly_earn,color=business_type_level_1))+
scale_color_discrete(guide="none")+labs(x="Business Type (level 1)",subtitle = "TikTok")# 1 saving plots in pdf with example
# ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color= drv)) +
# geom_smooth(mapping = aes(linetype = drv), method = 'loess') +
# geom_point()
#
# if(save == T){
# ggsave(here::here("plots", "name_plot.pdf"),
# width = 12.5, height = 8, device = cairo_pdf)
# }
# 2 pdfs will then be converted into the pngs using the 04_convert_pdfs_to_pngs.rmd file. View counts, comments, followers, and standardized earnings are not tightly associated.
yt_hourly_renamed <- yt_hourlycorrs_chp %>%
select(earn=hourly_earn,views=view_count,source,comments,followers=subs_numeric)
tt_hourly_renamed <- tt_hourlycorrs_chp %>%
select(earn=hourly_earn,views=play_count,source,comments=comment_count,
followers=author_meta_fans)
hourlyboth <- bind_rows(yt_hourly_renamed,tt_hourly_renamed)
ggplot(hourlyboth)+aes(x=views,y=earn,color=source)+geom_point()ggplot(hourlyboth)+aes(x=comments,y=earn,color=source)+geom_point()ggplot(hourlyboth)+aes(x=followers,y=earn,color=source)+geom_point()n_bus2tt <- tiktok_data_activities %>% distinct(business_type_level_2) %>% nrow()
tt_bus2 <- tiktok_data_activities %>% group_by(tt_video_id,idea) %>%
unchop(business_type_level_2) %>% ungroup()
maxnbus2tt <- tt_bus2 %>% group_by(tt_video_id,idea) %>%
distinct(tt_video_id,idea,business_type_level_2) %>% ungroup() %>% group_by(tt_video_id,idea) %>% summarise(nbus2=n()) %>% arrange(-nbus2) %>% pull(nbus2) %>% max()TikTok videos mentioned fewer (33) Business Activities in this level than Youtube videos. Many combinations of Business Types were possible for each video/idea, but none included more than 3.
No particular business type predominated, none represented >20% of suggested activities. The most frequent business type was Service Business - Microwork/tasks (e.g. Taking surveys, searching for data, tagging content and identifying images), followed by Ecommerce & Consumer - Online shop on marketplaces (e.g. Ebay, Etsy). Other types were much less common.
bus2ctt <- tt_bus2 %>% group_by(tt_video_id,idea) %>%
distinct(tt_video_id,idea,business_type_level_2) %>%
tabyl(business_type_level_2) %>% arrange(-n)
bus2ctt %>% na.omit() %>% mutate(percent=round(valid_percent,2)) %>% select(-valid_percent) %>% filter(n>1) %>%
gt() %>% tab_header("TikTok",subtitle = "Business Types (LEVEL 2), all videos") %>% tab_footnote(footnote = "n=1 not shown",
locations = cells_column_labels(
columns = n
)) | TikTok | ||
|---|---|---|
| Business Types (LEVEL 2), all videos | ||
| business_type_level_2 | n1 | percent |
| Service Business - Microwork/tasks (e.g. Taking surveys, searching for data, tagging content and identifying images) | 30 | 0.16 |
| Ecommerce & Consumer - Online shop on marketplaces (e.g. Ebay, Etsy) | 16 | 0.09 |
| Ecommerce & Consumer - Dropshipping | 14 | 0.08 |
| Service Business - Agency (e.g. Social Media Marketing, SEO, etc.) | 10 | 0.05 |
| Publication, Media & Blog - Affiliate Marketing | 9 | 0.05 |
| Service Business - Other freelance (e.g. on Upwork, Fiverr) | 9 | 0.05 |
| Ecommerce & Consumer - Amazon FBA | 8 | 0.04 |
| Investing - Real estate investing (e.g. House flipping/ Crowdfunding) | 8 | 0.04 |
| Ecommerce & Consumer - Online Shop | 7 | 0.04 |
| Publication, Media & Blog - Selling Digital Products (EBooks/Print On Demand etc)) | 7 | 0.04 |
| Publication, Media & Blog - YouTube | 6 | 0.03 |
| Service Business - Sales Representative/Advisor | 5 | 0.03 |
| Service Business - Vending Machine | 5 | 0.03 |
| Ecommerce & Consumer - Facebook Marketplace | 4 | 0.02 |
| Publication, Media & Blog - Image Sharing | 4 | 0.02 |
| Publication, Media & Blog - Youtube/Rumble/Pinterest Video Automation | 4 | 0.02 |
| Service Business - Become a Virtual Assistant | 4 | 0.02 |
| Service Business - Home Delivery Services | 4 | 0.02 |
| Service Business - Home Services (e.g. Power Washing, Pet sitting) | 4 | 0.02 |
| Ecommerce & Consumer - Sell belongings on marketplaces (e.g. Ebay, Craigslist, Etsy) | 3 | 0.02 |
| Investing - Stocks | 3 | 0.02 |
| Publication, Media & Blog - NFT's | 3 | 0.02 |
| Service Business - Freelance Writer | 3 | 0.02 |
| Service Business - Rent out room (e.g. Airbnb) | 3 | 0.02 |
| Service Business - Car Wash | 2 | 0.01 |
| Service Business - Furniture Flipping | 2 | 0.01 |
| Service Business - Rent out stuff (e.g. storage space, truck) | 2 | 0.01 |
| 1 n=1 not shown | ||
The Business type with the highest mean standardized earnings was Publication, Media & Blog - Write a blog (although this result is driven by a single video stating $300000 in monthly earnings through this method). After that Service Business - Furniture Flipping and Investing - Stocks have similar earnings, followed by various others.
tt_hourlycorrs_bus2 <- tt_hourlycorrs %>% group_by(tt_video_id,idea) %>%
unchop(business_type_level_2) %>% ungroup()
tt_hourlycorrs_bus2 %>% group_by(tt_video_id,idea) %>%
distinct(tt_video_id,idea,business_type_level_2,hourly_earn) %>% ungroup() %>%
group_by(business_type_level_2) %>%
summarise(mean_earn=mean(hourly_earn),
median_earn=median(hourly_earn)) %>%
arrange(-median_earn) %>% gt() %>% tab_header("TikTok, Business Type Level 2",
subtitle = "standardized hourly earning by business types")| TikTok, Business Type Level 2 | ||
|---|---|---|
| standardized hourly earning by business types | ||
| business_type_level_2 | mean_earn | median_earn |
| Publication, Media & Blog - Write a blog | 1500.0000000 | 1500.0000000 |
| Ecommerce & Consumer - Online shop on marketplaces (e.g. Ebay, Etsy) | 245.8878889 | 150.0000000 |
| Service Business - Furniture Flipping | 140.6250000 | 140.6250000 |
| Investing - Real estate investing (e.g. House flipping/ Crowdfunding) | 135.0000000 | 135.0000000 |
| Ecommerce & Consumer - Amazon FBA | 195.7437500 | 119.8012500 |
| Ecommerce & Consumer - Online Shop | 100.0000000 | 100.0000000 |
| Service Business - Agency (e.g. Social Media Marketing, SEO, etc.) | 73.8194444 | 75.0000000 |
| Service Business - Other freelance (e.g. on Upwork, Fiverr) | 79.8809524 | 75.0000000 |
| Service Business - Home Delivery Services | 63.4843750 | 63.4843750 |
| Service Business - Microwork/tasks (e.g. Taking surveys, searching for data, tagging content and identifying images) | 160.4275000 | 61.5000000 |
| Investing - Stocks | 55.8342361 | 55.8342361 |
| Ecommerce & Consumer - Dropshipping | 184.4657571 | 50.0000000 |
| Service Business - Car Wash | 50.0000000 | 50.0000000 |
| Service Business - Home Services (e.g. Power Washing, Pet sitting) | 45.0000000 | 40.0000000 |
| Publication, Media & Blog - Affiliate Marketing | 326.0000000 | 37.5000000 |
| Publication, Media & Blog - YouTube | 501.5000000 | 37.5000000 |
| Publication, Media & Blog - Youtube/Rumble/Pinterest Video Automation | 323.3333333 | 37.5000000 |
| Service Business - Freelance Writer | 68.0555556 | 30.0000000 |
| Publication, Media & Blog - Selling Digital Products (EBooks/Print On Demand etc)) | 45.8333333 | 25.0000000 |
| Service Business - Become a Virtual Assistant | 29.9950000 | 21.9850000 |
| Service Business - Rent out room (e.g. Airbnb) | 18.3175000 | 18.3175000 |
| Service Business - Sales Representative/Advisor | 18.7400000 | 18.0000000 |
| Ecommerce & Consumer - Sell belongings on marketplaces (e.g. Ebay, Craigslist, Etsy) | 9.1636667 | 9.1636667 |
| Software & Tech - Create a software | 1.4850000 | 1.4850000 |
| Service Business - Vending Machine | 0.6703125 | 0.6703125 |
The six business types at this level with n>5 also show a wide range of earnings.
tt_hourlycorrs_bus2 %>% group_by(tt_video_id,idea) %>%
distinct(tt_video_id,idea,business_type_level_2,hourly_earn) %>% ungroup %>% add_count(business_type_level_2) %>% filter(n>5) %>%
ggplot()+
geom_sina(aes(x=str_wrap(business_type_level_2,33),
y=hourly_earn,color=business_type_level_2))+
scale_color_discrete(guide="none")+labs(x="Business Type (level 1)")+
coord_flip()+labs(x="Business Type Level 2")ytbus1earn <-
yt_hourlycorrs_bus1 %>% group_by(yt_video_id,idea) %>%
distinct(yt_video_id,idea,business_type_level_1,hourly_earn) %>% ungroup %>% mutate(source="YouTube")
ttbus1earn <-
tt_hourlycorrs_bus1 %>% group_by(tt_video_id,idea) %>%
distinct(tt_video_id,idea,business_type_level_1,hourly_earn) %>% ungroup %>% mutate(source="TikTok")
bind_rows(ytbus1earn,ttbus1earn) %>% add_count(business_type_level_1) %>%
ggplot()+
geom_sina(aes(x=fct_reorder(business_type_level_1,n),
y=hourly_earn,fill=business_type_level_1, ),color="black",pch=21,size=3)+
scale_fill_manual(values= c("#0060df","#003eaa","#46B100","#46A5FF","#F3B71F"), guide="none")+labs(x="Business Type",
y="Hourly earnings (USD)")+
coord_flip()+theme(text = element_text(size=25))On YouTube, these three videos (from three different creators) report the highest earnings.
yt_hourlycorrs_chp %>% slice_max(hourly_earn,n = 3) %>% select(video_url,title,author,earnings_timeframe,earn,hourly_earn) %>% gt()| video_url | title | author | earnings_timeframe | earn | hourly_earn |
|---|---|---|---|---|---|
| https://www.youtube.com/watch?v=h6C0Dq_wcJ0 | FREE Bot Pays You $30.00 Per Minute in Passive Income [Make Money Online] | Online Hustle | Minutes | 30 | 1800 |
| https://www.youtube.com/watch?v=1_XD-J0u5E8 | NEW $7000/Day Copy Paste Website Pays Beginners! (Make Money Online) | Online Hustle | Days | 7000 | 875 |
| https://www.youtube.com/watch?v=YDZ3M0Az8BU | 7 Ways To Make Your First $100,000 Online | Iman Gadzhi | Months | 150000 | 750 |
On TikTok, these three videos (from three different creators) report the highest earnings.
tt_hourlycorrs_chp %>% slice_max(hourly_earn,n = 3) %>% select(web_video_url,text,author_meta_nick_name,earnings_timeframe,earn,hourly_earn) %>% gt()| web_video_url | text | author_meta_nick_name | earnings_timeframe | earn | hourly_earn |
|---|---|---|---|---|---|
| https://www.tiktok.com/@adamenfroy/video/7132196947120753963 | The most underrated #sidehustle? #makemoneyonline #blogging #affiliatemarketing | adamenfroy | Months | 300000 | 1500.0 |
| https://www.tiktok.com/@lukeisshorts/video/7119011507559664942 | Anybody can do it #investor #passiveincome #makemoneyfromhome | Luke Robins | Months | 184500 | 922.5 |
| https://www.tiktok.com/@ecomjoshcarter/video/6924254455114370305 | I QUIT MY 9-5... #ebay #dropshipping #ecom #makemoney | ecomjoshcarter | Days | 7000 | 875.0 |
In particular, author “Adam Enfroy” appears on both of these platforms reporting 100-300k USD in monthly earnings.
yt_hourlycorrs_chp %>% slice_min(hourly_earn,n = 3) %>% select(video_url,title,author,earnings_timeframe,earn,hourly_earn) %>% gt()| video_url | title | author | earnings_timeframe | earn | hourly_earn |
|---|---|---|---|---|---|
| https://www.youtube.com/watch?v=WGxKjgy7R4g | Stupid App Gives $500 To Beginners Who DO NOTHING [Make Money Online] | Online Hustle | Months | 15 | 0.0750000 |
| https://www.youtube.com/watch?v=_bUsIgEFTTc | 7.5 Passive Income Ideas To Easily Make $500/Day | Vincent Chan | Years | 500 | 0.2083333 |
| https://www.youtube.com/watch?v=N7LFPNYgMOI | Best 10 Apps That Pay You Real Money | Make Money Online | Mr Reis | Months | 55 | 0.2750000 |
tt_hourlycorrs_chp %>% slice_min(hourly_earn,n = 3) %>% select(web_video_url,text,author_meta_nick_name,earnings_timeframe,earn,hourly_earn) %>% gt()| web_video_url | text | author_meta_nick_name | earnings_timeframe | earn | hourly_earn |
|---|---|---|---|---|---|
| https://www.tiktok.com/@pristinevending/video/7112240282468388138 | How much did this machine make after 2 months? #vendingmachinebusiness #vendingmachine #sidehustle #sidehustleideas #drinkmachine | PristineVending | Months | 18.375000 | 0.0918750 |
| https://www.tiktok.com/@ebrahim_ka/video/6875602063842086146 | 5 ways to make money at home #howtomakemoney #makemoney #moneyathome #dubai | Ebrahim | Months | 52.000000 | 0.2600000 |
| https://www.tiktok.com/@mattlorion/video/6840985541836623109 | Turning $0 into $100,000 (Part 1) #fyp #entrepreneur #makemoney #flipping | Matt Lorion | Days | 2.618667 | 0.3273333 |
Different money-making ideas on both platforms varied in the number of skills needed to generate earnings. For the most part, each idea needed only one or two different skills, and this was more evident on TikTok (much shorter videos with generally only one money-making idea).
skills_per_idea_yt <-
youtube_data_activities %>% select(yt_video_id,idea,skills_required) %>%
group_by(yt_video_id,idea) %>% distinct() %>%
summarise(n_skills=n()) %>% ungroup() %>% mutate(source="YouTube") %>%
select(n_skills,source)
skills_per_idea_tt <-
tiktok_data_activities %>% select(tt_video_id,idea,skills_required) %>%
group_by(tt_video_id,idea) %>% distinct() %>%
summarise(n_skills=n()) %>% ungroup()%>% mutate(source="TikTok") %>%
select(n_skills,source)
bind_rows(skills_per_idea_tt,skills_per_idea_yt) %>%
ggplot()+
geom_histogram(aes(x=n_skills,fill=source))The makeup of required skills also varied across platforms.
On YouTube, the most mentioned skill was Marketing followed by Image or Video Editing.
youtube_data_activities %>% select(yt_video_id,idea,skills_required) %>% distinct() %>% tabyl(skills_required) %>% arrange(-n) %>% mutate(percent=round(valid_percent,2)) %>% select(-valid_percent) %>% na.omit %>% gt() %>% tab_header(title = "YouTube")| YouTube | ||
|---|---|---|
| skills_required | n | percent |
| Marketing | 152 | 0.24 |
| Image Editing | 83 | 0.13 |
| Video Editing | 81 | 0.13 |
| Writing | 70 | 0.11 |
| Investing | 69 | 0.11 |
| Doing mircowork for businesses | 67 | 0.11 |
| Speaking | 54 | 0.09 |
| Web Development | 29 | 0.05 |
| Programming | 22 | 0.03 |
| Providing Home Services | 3 | 0.00 |
| Gaming | 2 | 0.00 |
When an idea or video required two or more skills, the most common combinations were Speaking + Video Editing, Investing + Marketing, Speaking + Writing, and Image + Video Editing.
youtube_data_activities %>% select(yt_video_id,idea,skills_required) %>%
group_by(yt_video_id,idea) %>%
arrange(yt_video_id, skills_required) %>%
summarize(combination = paste0(skills_required, collapse = " - "), .groups = "drop") %>%
count(combination) %>%
filter(str_detect(combination," - ")) %>% arrange(-n) %>% slice(1:10) %>%
gt() %>% tab_header("YouTube Data", subtitle="Combinations of >2 skills, Top 10 most common combinations shown")| YouTube Data | |
|---|---|
| Combinations of >2 skills, Top 10 most common combinations shown | |
| combination | n |
| Speaking - Video Editing | 13 |
| Investing - Marketing | 12 |
| Speaking - Writing | 12 |
| Image Editing - Video Editing | 7 |
| Image Editing - Marketing | 6 |
| Image Editing - Marketing - Programming - Video Editing - Web Development - Writing | 6 |
| Image Editing - Investing | 5 |
| Image Editing - Speaking - Video Editing | 4 |
| Image Editing - Speaking - Video Editing - Writing | 4 |
| Investing - Investing | 4 |
TikTok videos favored Investing, followed by Marketing and Writing.
tiktok_data_activities %>% group_by(tt_video_id,idea) %>% tabyl(skills_required) %>% arrange(-n) %>% mutate(percent=round(valid_percent,2)) %>% select(-valid_percent) %>% na.omit %>% gt() %>% tab_header(title = "TikTok")| TikTok | ||
|---|---|---|
| skills_required | n | percent |
| Investing | 73 | 0.26 |
| Marketing | 57 | 0.20 |
| Writing | 37 | 0.13 |
| Image Editing | 33 | 0.12 |
| Doing mircowork for businesses | 32 | 0.11 |
| Video Editing | 21 | 0.07 |
| Providing Home Services | 9 | 0.03 |
| Speaking | 8 | 0.03 |
| Web Development | 8 | 0.03 |
| Programming | 5 | 0.02 |
| Gaming | 1 | 0.00 |
Despite the fewer videos that required >1 skill, there were some frequently mentioned skill combinations such as Investing + Marketing, followed by Marketing+Programming+Video Editing+Web Development+Writing.
tiktok_data_activities %>% select(tt_video_id,idea,skills_required) %>%
group_by(tt_video_id,idea) %>%
arrange(tt_video_id, skills_required) %>%
summarize(combination = paste0(skills_required, collapse = " - "), .groups = "drop") %>%
count(combination) %>%
filter(str_detect(combination," - ")) %>% arrange(-n) %>% slice(1:10) %>%
gt() %>% tab_header("TikTok Data", subtitle="Combinations of >2 skills, Top 10 most common combinations shown")| TikTok Data | |
|---|---|
| Combinations of >2 skills, Top 10 most common combinations shown | |
| combination | n |
| Investing - Marketing | 9 |
| Marketing - Programming - Video Editing - Web Development - Writing | 4 |
| Doing mircowork for businesses - Doing mircowork for businesses - Writing - Writing | 3 |
| Doing mircowork for businesses - Speaking | 3 |
| Image Editing - Image Editing | 3 |
| Marketing - Marketing | 3 |
| Doing mircowork for businesses - Writing | 2 |
| Image Editing - Image Editing - Image Editing - Image Editing - Investing - Investing - Investing - Investing | 2 |
| Image Editing - Image Editing - Investing - Investing - Marketing - Marketing | 2 |
| Image Editing - Image Editing - Marketing - Marketing | 2 |
Across both platforms, the Skill with the highest mean standardized earnings (hourly) was Investing, followed by Marketing, Video Editing, and Writing.
ttskillearn <-
tt_hourlycorrs %>% select(tt_video_id,idea,hourly_earn,skills_required) %>% group_by(tt_video_id,idea) %>% distinct() %>%
group_by(skills_required) %>%
summarise(mean_earn=mean(hourly_earn),
median_earn=median(hourly_earn)) %>%
arrange(-mean_earn)
ytskillearn <-
yt_hourlycorrs %>% select(yt_video_id,idea,hourly_earn,skills_required) %>% group_by(yt_video_id,idea) %>% distinct() %>%
group_by(skills_required) %>%
summarise(mean_earn=mean(hourly_earn),
median_earn=median(hourly_earn)) %>%
arrange(-mean_earn)
ytskillearns <-
yt_hourlycorrs %>% select(yt_video_id,idea,hourly_earn,skills_required) %>% group_by(yt_video_id,idea) %>% distinct() %>%
ungroup %>% mutate(source="YouTube")
ttskillearns <-
tt_hourlycorrs %>% select(tt_video_id,idea,hourly_earn,skills_required) %>% group_by(tt_video_id,idea) %>% distinct() %>%
ungroup %>% mutate(source="TikTok")
bothearn <- bind_rows(ytskillearns,ttskillearns) %>% group_by(skills_required) %>%
summarise(mean_earn=mean(hourly_earn),
median_earn=median(hourly_earn)) %>%
arrange(-mean_earn)
gt(bothearn) %>% tab_header(title='all platforms',subtitle = "earnings by skill")| all platforms | ||
|---|---|---|
| earnings by skill | ||
| skills_required | mean_earn | median_earn |
| Marketing | 162.62975 | 62.50000 |
| Investing | 144.76703 | 55.55556 |
| Video Editing | 140.88650 | 50.00000 |
| Doing mircowork for businesses | 135.10391 | 46.50000 |
| Writing | 131.10958 | 25.99250 |
| Speaking | 100.22660 | 25.00000 |
| Providing Home Services | 40.89468 | 27.96875 |
| Image Editing | 40.24922 | 25.00000 |
| Programming | 38.74833 | 40.00000 |
| Web Development | 37.03615 | 37.50000 |
| Gaming | 36.00000 | 36.00000 |
By platform, differences in reported earnings appear. On YouTube, Marketing and Investing report the highest mean earnings. On TikTok, Video Editing and Investing report the highest earnings.
gt(ytskillearn)%>% tab_header(title='YouTube',subtitle = "earnings by skill")| YouTube | ||
|---|---|---|
| earnings by skill | ||
| skills_required | mean_earn | median_earn |
| Marketing | 146.43900 | 62.50000 |
| Writing | 138.97708 | 25.00000 |
| Doing mircowork for businesses | 109.94519 | 45.00000 |
| Investing | 107.55416 | 40.00000 |
| Video Editing | 80.54883 | 52.05250 |
| Speaking | 47.41574 | 20.00000 |
| Web Development | 43.12500 | 38.75000 |
| Programming | 42.75000 | 40.00000 |
| Image Editing | 36.72155 | 21.04167 |
| Providing Home Services | 18.69444 | 19.83333 |
gt(ttskillearn) %>% tab_header(title='TikTok',subtitle = "earnings by skill")| TikTok | ||
|---|---|---|
| earnings by skill | ||
| skills_required | mean_earn | median_earn |
| Video Editing | 364.99786 | 37.5000 |
| Speaking | 241.05556 | 237.5000 |
| Marketing | 205.80511 | 50.0000 |
| Doing mircowork for businesses | 170.85579 | 48.0000 |
| Investing | 169.57562 | 100.0000 |
| Writing | 121.66858 | 25.9925 |
| Image Editing | 55.06547 | 50.0000 |
| Providing Home Services | 51.99479 | 45.0000 |
| Gaming | 36.00000 | 36.0000 |
| Programming | 33.74625 | 33.4925 |
| Web Development | 27.29400 | 21.9850 |
Additionally, TikTok videos report significantly higher earnings for the same skills compared with YouTube videos.
ytskearnsummary <- ytskillearn %>% mutate(source="YouTube")
ttskearnsummary <- ttskillearn %>% mutate(source="TikTok")
bind_rows(ytskearnsummary,ttskearnsummary) %>%
ggplot()+
geom_bar(aes(x=fct_reorder(skills_required,mean_earn),y=mean_earn,fill=source),stat="identity",position = "dodge")+coord_flip()+labs(x='skill')The variation and spread of earnings by skill is consistent across platforms.
bind_rows(ytskillearns,ttskillearns) %>%
ggplot(aes(x=fct_reorder(skills_required,hourly_earn),y=hourly_earn,color=source))+
geom_sina()+labs(x='skill')+
coord_flip()More required skills did not relate with greater earnings.
# skills_per_idea_ytid <-
# youtube_data_activities %>% select(yt_video_id,idea,skills_required) %>%
# group_by(yt_video_id,idea) %>% distinct() %>%
# summarise(n_skills=n()) %>% ungroup() %>% mutate(source="YouTube")
#
# skills_per_idea_ttid <-
# tiktok_data_activities %>% select(tt_video_id,idea,skills_required) %>%
# group_by(tt_video_id,idea) %>% distinct() %>%
# summarise(n_skills=n()) %>% ungroup()%>% mutate(source="TikTok")
skillsearntt <- left_join(skills_per_idea_tt,tt_hourly_earn)
skillsearnyt <- left_join(skills_per_idea_yt,yt_hourly_earn)
bind_rows(skillsearntt,skillsearnyt) %>%
ggplot(aes(x=factor(n_skills),hourly_earn))+geom_boxplot()+
labs(x="number of skills required per money-making idea",y="standardized earnings")In general, the video titles vary considerably across platforms in terms of length, content and style.
tiktok_data <- tiktok_data %>% mutate(title_noHash=str_extract(text,"^[^#]*"))
yt_tlength <- round(mean(str_length(youtube_data$title)),0)
tt_tlength <-round(mean(str_length(tiktok_data$text)))
tt_tlength_nh <-round(mean(str_length(tiktok_data$title_noHash)))Without various trailing hashtags, YouTube video titles are on average, twice as long as TikTok titles (65 vs. 31 characters). Overall, roughly a third of the length of TikTok titles comprises various hashtags.
The words and bigrams (consecutive sequences of two words) that appear most frequently in the video’s titles vary significantly between platforms.
# tokenize
stopwords <- c("for","in","a","the","to","with","from","by")
title_words_yt <- youtube_data %>% unnest_tokens(title_wrd,title,token = "words") %>%
filter(!title_wrd %in% stopwords)
title_bigrams_yt <- youtube_data %>% unnest_tokens(title_bg,title,token = "ngrams",n=2)
title_words_tt <- tiktok_data %>% unnest_tokens(title_wrd,text,token = "tweets") %>%
filter(!title_wrd %in% stopwords)
title_bigrams_tt <- title_words_tt %>% mutate(nextwrdbg=lead(title_wrd)) %>%
unite(title_bg, title_wrd, nextwrdbg, sep = ' ')
wordsyt <- title_words_yt %>% count(title_wrd) %>% slice_max(n,n=15) %>% mutate(source="YouTube")
wordstt <- title_words_tt %>% count(title_wrd) %>% slice_max(n,n=15) %>% mutate(source="TikTok")
bg_yt <- title_bigrams_yt %>% count(title_bg) %>% slice_max(n,n=15) %>% mutate(source="YouTube")
bg_tt <- title_bigrams_tt %>% count(title_bg) %>% slice_max(n,n=15) %>% mutate(source="TikTok")
top15wrds <- bind_rows(wordsyt,wordstt)
top15bg <- bind_rows(bg_yt,bg_tt)
ggplot(top15wrds)+
geom_lollipop(aes(x=fct_reorder(title_wrd,n),y=n))+
facet_wrap(~source)+labs(x="word or hashtag",y='occurrences')+
coord_flip()ggplot(top15bg)+
geom_lollipop(aes(x=fct_reorder(title_bg,n),y=n))+
facet_wrap(~source)+labs(x="bigram",y='occurrences')+
coord_flip()Considering the top 15 words or bigrams, there is little overlap between platforms.